package cn.datawin.task.pipe;

import java.util.Date;
import java.util.Map;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCollection;
import cn.datawin.spider.page.Page;
import cn.datawin.spider.pipeline.PipeLine;
import cn.datawin.spider.processor.Processor;
import cn.datawin.task.Statistic;
import cn.datawin.task.Task;
import cn.datawin.task.dao.DbUtil;

public class DataPipeLine implements PipeLine{

	
	/**
	 * url
	 * page 规则 替换 
	 * ?page=
	 * pages
	 * 正则替换 或其他替换方式
	 * @throws Exception 
	 * 
	 */
	@Override
	public void pipe(Processor process) {
		Task task = null;
		try{
			Page page = process.getPage();
			Map<String, Object> map = process.getParams();
			if(map.size()==0) return;
			System.out.println("data...");
			task = (Task)process.getTask();
			DBCollection collection=DbUtil.getcoll(task.getWorkbeach()+"data");
			map.put("url", page.getHttpRequest().getUrl());
			map.put("date", new Date());
			if (task.getExtedata()!=null&&!task.getExtedata().equals("")) {
				map.putAll(task.getExtedata());
			}
			for(String key: map.keySet()){
				Object obj = map.get(key);
				if(obj instanceof byte[]){
					String imgurl=DbUtil.saveImg(task.getWorkbeach()+"data",(byte[])obj , page.getHttpRequest().getUrl().replaceAll("//", "").replaceAll("/", "").replaceAll("\\:", "").replaceAll("\\.", "")+".jpg");
					map.put(key, imgurl);
				}
			}
			collection.insert(new BasicDBObject(map));
		}finally{
			try {
				Statistic.getInstance().addCount(task);
			} catch (Exception e) {
			}
		}
		
		
	}
}
